#####
# Replication for: "Can Political Speech Foster Tolerance of Immigrants?" by Schleiter, Tavits, and Ward.
# Table S.14
#####

library(here)
library(data.table)
library(xtable)

# source the custom functions
source("functions.R")

# load the pooled data if not already in workspace
if(!exists("pooled")){
  pooled <- fread(here("data", "pooled.csv"))
}

# create the trichotomous education variable
pooled[ , edu3 := factor(
  fcase(
    edu2 %in% c("Less than high school", "High school graduate/ GED"), "Low",
    edu2 %in% c("2 year degree", "Some college"), "Medium",
    edu2 %in% c("4 year degree", "Masters degree", "Professional Degree (JD, MD)", "Doctorate"), "High"
  ), levels = c("Low", "Medium", "High")
)]


# Helper functions --------------------------------------------------------

# These three functions do the starch of the analysis. The first fits all 9 models, the second extracts the 9 marginal effects per model. The third does the battery of p-values
allCovEdufits <- 
  function(y, samp, data = pooled, chatty = T){
    if(samp != "Replication"){
      form <- as.formula(paste0(y, "~ CH + NM + CS + CH*edu3 + NM*edu3 + CS*edu3 + female + age + I(age^2) + race2 + pid2 + news2 + region2 + factor(wave)"))
    } else {
      form <- as.formula(paste0(y, "~ CH + NM + CS + CH*edu3 + NM*edu3 + CS*edu3 + female + age + I(age^2) + race2 + pid2 + news2 + region2"))
    }
    
    sample_use <- c("Main" = "replication == 0", "Replication" = "replication == 1", "Pooled" = "")[samp]
    
    if(samp != "Pooled"){
      mod <- lm(formula = form, data = data[eval(parse(text = sample_use))])
    } else {
      mod <- lm(formula = form, data = data)
    }
    
    if(chatty) print(summary(mod))
    return(mod)
  }

edu_ME_extract <- 
  function(mod, level = .95){
    out <- expand.grid("treatment" = c("CH", "NM", "CS"), "edu" = c("L", "M", "H"), stringsAsFactors = F); setDT(out)
    
    out[edu == "L" , c("est", "lower", "upper") := .(
      coef(mod)[c("CH", "NM", "CS")],
      confint(mod, level = level)[c("CH", "NM", "CS"), 1],
      confint(mod, level = level)[c("CH", "NM", "CS"), 2]
    )]
    
    for(n in 4:9){
      vars <- c(out$treatment[n], paste0(out$treatment[n], ":edu3", ifelse(out$edu[n] == "M", "Medium", "High")))
      est <- sum(coef(mod)[vars])
      se <- sqrt(vcov(mod)[vars[1], vars[1]] + vcov(mod)[vars[2], vars[2]] + 2*vcov(mod)[vars[1], vars[2]])
      set(out, i = n, j = "est", value = est)
      set(out, i = n, j = "lower", value = est - 1.96*se)
      set(out, i = n, j = "upper", value = est + 1.96*se)
    }
    
    return(out)
  }

# p-values that I want:
# for each one, compare that to 0 (this is easy for I, hard for others)
# For each, compare I to D (just the signif. of the interaction)
# for each, compare I to R (just the signif. of the interaction)
# For each, compare D to R (test of significant differences between the interaction terms)

edu_pval_extract <-
  function(mod){
    out <- expand.grid("treatment" = c("CH", "NM", "CS"), "test" = c("L_v_0", "M_v_0", "H_v_0", "L_v_M", "L_v_H", "M_v_H"), stringsAsFactors = F); setDT(out)
    
    # knock out the ones that aren't marginal effects
    out[test == "L_v_0", p := summary(mod)$coefficients[treatment, 4]]
    out[test == "L_v_M", p := summary(mod)$coefficients[paste0(treatment, ":edu3Medium"), 4]]
    out[test == "L_v_H", p := summary(mod)$coefficients[paste0(treatment, ":edu3High"), 4]]
    
    # D_v_0 and R_v_0 are easy enough
    for(n in which(out$test %in% c("M_v_0", "H_v_0"))){
      vars <- c(out$treatment[n], paste0(out$treatment[n], ":edu3", ifelse(out$test[n] == "M_v_0", "Medium", "High")))
      est <- sum(coef(mod)[vars])
      se <- sqrt(vcov(mod)[vars[1], vars[1]] + vcov(mod)[vars[2], vars[2]] + 2*vcov(mod)[vars[1], vars[2]])
      set(out, i = n, j = "p", value = 2*(1-pnorm(abs(est/se))))
    }
    
    # D_v_R is test of significant difference between the interaction terms
    for(n in which(out$test %in% "M_v_H")){
      vars <- paste0(out$treatment[n], c(":edu3Medium", ":edu3High"))
      top <- coef(mod)[vars[1]] - coef(mod)[vars[2]]
      se <- sqrt(vcov(mod)[vars[1], vars[1]] + vcov(mod)[vars[2], vars[2]] - 2*vcov(mod)[vars[1], vars[2]])
      set(out, i = n, j = "p", value = 2*(1 - pnorm(abs(top/se))))
    }
    
    return(out)
  }


# Fitting -----------------------------------------------------------------

edu_fits <- expand.grid(
  "y" = c("immPCA", "imm_neighbors2", "imm_increase2"),
  "samp" = c("Main", "Replication", "Pooled")
  )
setDT(edu_fits)

edu_fits[ , fit := Map(allCovEdufits, y, samp, chatty = F)]
edu_me<- edu_fits[ , edu_ME_extract(mod = fit[[1]]), by = .(y, samp)]
edu_p <- edu_fits[ , edu_pval_extract(fit[[1]]), by = .(y, samp)]


# Reporting ---------------------------------------------------------------

p_print <- dcast(edu_p, samp + y + treatment ~ test, value.var = "p")

p_print[ , y := fcase(
  y == "immPCA", "Index",
  y == "imm_neighbors2", "Neighbors",
  y == "imm_increase2", "Increase"
)]

p_print[ , treatment := fcase(
  treatment == "NM", "Norms",
  treatment == "CH", "Common Humanity",
  treatment == "CS", "Countering Stereotypes"
)]

print(
  xtable(
    p_print[ , .(y, treatment, L_v_0, M_v_0, H_v_0, L_v_M, L_v_H, M_v_H)],
    caption = "P-values from Education Heterogeneity Analysis",
    align = rep(c("l", "c"), times = c(3, 6))
  ), 
  include.rownames = FALSE,
  booktabs = T
)
